#Load in the Packages
library(USAboundaries)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(downloader)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#Load in the Data 
csv_temp <- tempfile()
download("https://raw.githubusercontent.com/WJC-Data-Science/DTS350/master/permits.csv", csv_temp, mode = "wb")
permit_data <- read_csv(csv_temp)
## Warning: Missing column names filled in: 'X1' [1]
## 
## -- Column specification --------------------------------------------------------
## cols(
##   X1 = col_double(),
##   state = col_double(),
##   StateAbbr = col_character(),
##   county = col_double(),
##   countyname = col_character(),
##   variable = col_character(),
##   year = col_double(),
##   value = col_double()
## )
#look at the Data and How it was brought in.
head(permit_data)
## # A tibble: 6 x 8
##      X1 state StateAbbr county countyname     variable     year value
##   <dbl> <dbl> <chr>      <dbl> <chr>          <chr>       <dbl> <dbl>
## 1     1     1 AL             1 Autauga County All Permits  2010   191
## 2     2     1 AL             1 Autauga County All Permits  2009   110
## 3     3     1 AL             1 Autauga County All Permits  2008   173
## 4     4     1 AL             1 Autauga County All Permits  2007   260
## 5     5     1 AL             1 Autauga County All Permits  2006   347
## 6     6     1 AL             1 Autauga County All Permits  2005   313
str(permit_data)
## spec_tbl_df [327,422 x 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ X1        : num [1:327422] 1 2 3 4 5 6 7 8 9 10 ...
##  $ state     : num [1:327422] 1 1 1 1 1 1 1 1 1 1 ...
##  $ StateAbbr : chr [1:327422] "AL" "AL" "AL" "AL" ...
##  $ county    : num [1:327422] 1 1 1 1 1 1 1 1 1 1 ...
##  $ countyname: chr [1:327422] "Autauga County" "Autauga County" "Autauga County" "Autauga County" ...
##  $ variable  : chr [1:327422] "All Permits" "All Permits" "All Permits" "All Permits" ...
##  $ year      : num [1:327422] 2010 2009 2008 2007 2006 ...
##  $ value     : num [1:327422] 191 110 173 260 347 313 367 283 276 400 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   state = col_double(),
##   ..   StateAbbr = col_character(),
##   ..   county = col_double(),
##   ..   countyname = col_character(),
##   ..   variable = col_character(),
##   ..   year = col_double(),
##   ..   value = col_double()
##   .. )
tail(permit_data)
## # A tibble: 6 x 8
##       X1 state StateAbbr county countyname    variable                year value
##    <dbl> <dbl> <chr>      <dbl> <chr>         <chr>                  <dbl> <dbl>
## 1 327417    56 WY            45 Weston County 2-Unit Multifamily      1980     4
## 2 327418    56 WY            45 Weston County 3 & 4-Unit Multifamily  2004     4
## 3 327419    56 WY            45 Weston County 3 & 4-Unit Multifamily  1982     8
## 4 327420    56 WY            45 Weston County 3 & 4-Unit Multifamily  1981    20
## 5 327421    56 WY            45 Weston County 5+-Unit Multifamily     2000    10
## 6 327422    56 WY            45 Weston County 5+-Unit Multifamily     1981     8
#Merge the the two Dataframes
zip_codes <- state_codes %>%
  mutate(state = as.integer(state_code))

state_permits <- merge(permit_data, zip_codes,by = "state") %>%
  group_by(state_name,year) %>%
  summarise(across(value, sum))
## `summarise()` has grouped output by 'state_name'. You can override using the `.groups` argument.
head(state_permits)
## # A tibble: 6 x 3
## # Groups:   state_name [1]
##   state_name  year value
##   <chr>      <dbl> <dbl>
## 1 Alabama     1980 38954
## 2 Alabama     1981 24592
## 3 Alabama     1982 21454
## 4 Alabama     1983 43537
## 5 Alabama     1984 37676
## 6 Alabama     1985 41725
# The first plot is to see the overall permit data by state
state_permit_plot <- ggplot(data = state_permits, aes(x = year, y = value/1000, color = state_name)) +
  geom_point(aes(text = paste("State:",state_name))) +
  geom_line() +
  geom_vline(xintercept = 2008, linetype = 'dotted') +
  labs(x = 'Time in (Years)', y = 'Number of Permits', title = 'Permits by State') +
  theme_bw()+
  theme(legend.position = 'none')
## Warning: Ignoring unknown aesthetics: text
ggplotly(state_permit_plot)
#I wish there was a better way to graph the data without having it all on top of each other 
#I unerstand when we make it interactive we can see what each data point is from. 
#We can See that California sold the most in 1986. What I find interesting are the top three.
#when looking at the top three we can see that Florida, Texas, And California had a growning population 
# we should look how thay compare individually over time.
#Top three States 
Top_three <- filter(state_permits, state_name %in% c('Florida', 'California', 'Texas')) 
  
Plot_top_three <- ggplot(data = Top_three, aes(x = year, y = value/1000, color = state_name))+
  geom_point() +
  geom_line() +
  geom_vline(xintercept = 2008, linetype = 'dotted') +
  labs(x = 'Time', y = 'Number of Permits', title = 'Top 3 Permit Sates') +
  theme_bw()
ggplotly(Plot_top_three)  
# after adding in the 2008 Market crash to the data we can see that these three big states
#took a big hit. They all continued tp drop after the crash even though these three states have
# the highest populations in the united states.